Setup
| world_rank | university_name | country | teaching | international | research | citations | income | total_score | num_students | student_staff_ratio | international_students | female_male_ratio | year | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Harvard University | United States of America | 99.7 | 72.4 | 98.7 | 98.8 | 34.5 | 96.1 | 20,152 | 8.9 | 25% | NaN | 2011 | 
| 1 | 2 | California Institute of Technology | United States of America | 97.7 | 54.6 | 98.0 | 99.9 | 83.7 | 96.0 | 2,243 | 6.9 | 27% | 33 : 67 | 2011 | 
| 2 | 3 | Massachusetts Institute of Technology | United States of America | 97.8 | 82.3 | 91.4 | 99.9 | 87.5 | 95.6 | 11,074 | 9.0 | 33% | 37 : 63 | 2011 | 
| 3 | 4 | Stanford University | United States of America | 98.3 | 29.5 | 98.1 | 99.2 | 64.3 | 94.3 | 15,596 | 7.8 | 22% | 42 : 58 | 2011 | 
| 4 | 5 | Princeton University | United States of America | 90.9 | 70.3 | 95.4 | 99.9 | - | 94.2 | 7,929 | 8.4 | 27% | 45 : 55 | 2011 | 
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2603 entries, 0 to 2602
Data columns (total 14 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   world_rank              2603 non-null   object 
 1   university_name         2603 non-null   object 
 2   country                 2603 non-null   object 
 3   teaching                2603 non-null   float64
 4   international           2603 non-null   object 
 5   research                2603 non-null   float64
 6   citations               2603 non-null   float64
 7   income                  2603 non-null   object 
 8   total_score             2603 non-null   object 
 9   num_students            2544 non-null   object 
 10  student_staff_ratio     2544 non-null   float64
 11  international_students  2536 non-null   object 
 12  female_male_ratio       2370 non-null   object 
 13  year                    2603 non-null   int64  
dtypes: float64(4), int64(1), object(9)
memory usage: 284.8+ KB
Line Plot
Code
#1. prepare data frame
df = timesData.iloc[:100,:]
#2 creating trace and data
trace1 = go.Scatter(
    x = df.world_rank, #Sets the x coordinates
    y = df.citations, #Sets the y coordinates
    mode = "lines",#Determines the drawing mode for this scatter trace
)
data = [trace1]
#3 Set the layout version1
layout = dict(title = "Citation and Teaching")
#3 set the layout version2
#layout = go.Layout(title = "Citation and Teaching")
#4 create figure version1
fig = dict(data = data,layout = layout)
#4 create figure version2
#fig = go.Figure(data = data,layout = layout)
#5 plot figure
iplot(fig) 
#fig.show()add markers and text
Code
#1 data frame
df = timesData.iloc[:100]
#2 trace and data
trace = go.Scatter(
    x = df.world_rank,
    y = df.citations,
    mode = "lines+markers", #add marker,
    marker = dict(color = "rgba(16,112,2,0.8)"),
    text = df.university_name #add text
)
data = [trace]
#3 layout and data
layout = go.Layout(
    title = "citation",
    xaxis = dict(title = "World Rank",ticklen = 5)
)
#4 create figure
fig = go.Figure(data = data,layout = layout)
#5 plot figure
fig.show()version2가 뭔가 더 좋을듯?
Scatter Plot
Scatter plot과 lineplot 둘 다 go.Scatter 객체 사용. 다른점은 mode 설정
Code
#1. data frame
df2014 = timesData[timesData.year == 2014].iloc[:100,:]
#2. trace,data
trace = go.Scatter(
    x = df2014.world_rank, #Sets the x coordinates
    y = df2014.citations, #Sets the y coordinates
    mode = "markers",
)
data = [trace]
#3. layout 
layout = go.Layout(title = "Ciation vs world rank")
#4. create figure
fig = go.Figure(data = data,layout = layout)
#5. plot figure
fig.show()add markers and text
Code
#1 data frame
df2014 = timesData[timesData.year == 2014].iloc[:100,:]
#2 trace,data
trace = go.Scatter(
    x = df2014.world_rank,
    y = df2014.citations,
    mode = "markers",
    #marker = dict(color = "green",opacity=0.8), #alpha(불투명도) 조절 vs1
    marker = dict(color = "rgba(255,128,2,0.8)"), #alpha(불투명도) 조절 vs2
    text = df2014.university_name,
)
data = [trace]
#3 layout
layout = go.Layout(xaxis = dict(title = "World Rank"),yaxis = dict(title = "Citation"))
#4 create figure
fig = go.Figure(data=data,layout=layout)
#5 plot
fig.show()Histogram
Code
여러개의 차트 겹처 그리기
- 여기서는 histogram으로 했으나 다른차트들도 가능
 
Code
#1.dataframe
x2011 = timesData.student_staff_ratio[timesData.year == 2011]
x2012 = timesData.student_staff_ratio[timesData.year == 2012]
#2.trace&data
trace1 = go.Histogram(
    x=x2011,
    #opacity=0.7, #불투명도 조절
    name="2011", #범례(legend)를 설정하기 위한 이름 설정
    marker=dict(color="rgb(171,50,96)",opacity=0.7)
)
trace2 = go.Histogram(
    x=x2012,
    name="2012",
    marker=dict(color="blue",opacity=0.7)
)
data=[trace1,trace2]
#3.layout
layout = go.Layout(
    barmode = "overlay", #trace 겹쳐 그리기
    xaxis=dict(title="students-staff ratio"),
    yaxis=dict(title="count"),
    title = dict(text = "histogram",x = 0.5)
)
#4 figure
fig = go.Figure(data=data,layout=layout)
fig.show()참고자료 - Opacity와 alpha? : Opacity는 marker안팎에서 모두 쓰일 수 있으며 alpha는 rgba와 쓸때만 입력,같은 역할을 함. 단,Opacity를 marker의 밖에서 입력하면 trace안에서 밀도를 표현 하지 못함. 다른 trace끼리 겹칠때에는 밀도표현됨.(같은 trace에서만 안됨.)
Code
# 1.data frame
dataframe = timesData[timesData.year == 2015]
#2.trace and data
data = []
for col in ["world_rank","citations","income","total_score"]:
    _trace = go.Scatter(
        x = dataframe["world_rank"],
        y = dataframe[col],
        mode = "lines"
    )
    data.append(_trace)
#3. layout
layout = go.Layout(
    xaxis=dict(
        domain=[0, 0.45]
    ),
    yaxis=dict(
        domain=[0, 0.45]
    ),
    xaxis2=dict(
        domain=[0.55, 1]
    ),
    xaxis3=dict(
        domain=[0, 0.45],
        anchor='y3'
    ),
    xaxis4=dict(
        domain=[0.55, 1],
        anchor='y4'
    ),
    yaxis2=dict(
        domain=[0, 0.45],
        anchor='x2'
    ),
    yaxis3=dict(
        domain=[0.55, 1]
    ),
    yaxis4=dict(
        domain=[0.55, 1],
        anchor='x4'
    ),
    title = 'Research, citation, income and total score VS World Rank of Universities'
)
#4. fig
fig = make_subplots(rows=2,cols=2)
#5. plot
row = 1
col = 1
for trace in data:
    fig.append_trace(trace,row=row,col=col)
    col+=1
    if col > 2:
        col = 1
        row+=1
fig.show()Code
from plotly.subplots import make_subplots
import plotly.graph_objects as go
fig = make_subplots(
    rows=2, cols=2,
    specs=[[{"type": "xy"}, {"type": "polar"}],
           [{"type": "domain"}, {"type": "scene"}]],
)
fig.add_trace(go.Bar(y=[2, 3, 1]),
              row=1, col=1)
fig.add_trace(go.Barpolar(theta=[0, 45, 90], r=[2, 3, 1]),
              row=1, col=2)
fig.add_trace(go.Pie(values=[2, 3, 1]),
              row=2, col=1)
fig.add_trace(go.Scatter3d(x=[2, 3, 1], y=[0, 0, 0],
                           z=[0.5, 1, 2], mode="lines"),
              row=2, col=2)
fig.update_layout(height=700, showlegend=False)
fig.show()Vector
Vector field(quiver plot)
사전준비
- np.meshgrid : x좌표,y좌표를 가지는 벡터를 입력했을때, 두 벡터로 만들 수 있는 격자의 좌표(x,y)를 출력
 
Code
(10,) (10,)
(10, 10) (10, 10)
- 격자(grid,matrix)에 함수 적용하면? => matrix(x,y 각각의 좌표)의 모든 요소에 함수가 적용됨
 
- 배열의 요소 값 차례대로 읽어보기 …
 
(0,0),(0.2,0),(0.4,0) … (1.8,0) => (0,0.2),(0.2,0.2),(0.4,0.2)…
 x좌표 다 읽고 y좌표증가 그 다음 x좌표 다 읽고 y좌표 증가 …
(array([[0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2, 1.4, 1.6, 1.8],
        [0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2, 1.4, 1.6, 1.8],
        [0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2, 1.4, 1.6, 1.8],
        [0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2, 1.4, 1.6, 1.8],
        [0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2, 1.4, 1.6, 1.8],
        [0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2, 1.4, 1.6, 1.8],
        [0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2, 1.4, 1.6, 1.8],
        [0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2, 1.4, 1.6, 1.8],
        [0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2, 1.4, 1.6, 1.8],
        [0. , 0.2, 0.4, 0.6, 0.8, 1. , 1.2, 1.4, 1.6, 1.8]]),
 array([[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
        [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2],
        [0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4],
        [0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6, 0.6],
        [0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8, 0.8],
        [1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. , 1. ],
        [1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2],
        [1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4],
        [1.6, 1.6, 1.6, 1.6, 1.6, 1.6, 1.6, 1.6, 1.6, 1.6],
        [1.8, 1.8, 1.8, 1.8, 1.8, 1.8, 1.8, 1.8, 1.8, 1.8]]))
Gradient Vector Field
\(\nabla f = xe^{-x^2-y^2}\)
Code
(array([[-1.        , -0.97979798, -0.95959596, ...,  0.95959596,
          0.97979798,  1.        ],
        [-1.        , -0.97979798, -0.95959596, ...,  0.95959596,
          0.97979798,  1.        ],
        [-1.        , -0.97979798, -0.95959596, ...,  0.95959596,
          0.97979798,  1.        ],
        ...,
        [-1.        , -0.97979798, -0.95959596, ...,  0.95959596,
          0.97979798,  1.        ],
        [-1.        , -0.97979798, -0.95959596, ...,  0.95959596,
          0.97979798,  1.        ],
        [-1.        , -0.97979798, -0.95959596, ...,  0.95959596,
          0.97979798,  1.        ]]),
 array([[-1.        , -1.        , -1.        , ..., -1.        ,
         -1.        , -1.        ],
        [-0.97979798, -0.97979798, -0.97979798, ..., -0.97979798,
         -0.97979798, -0.97979798],
        [-0.95959596, -0.95959596, -0.95959596, ..., -0.95959596,
         -0.95959596, -0.95959596],
        ...,
        [ 0.95959596,  0.95959596,  0.95959596, ...,  0.95959596,
          0.95959596,  0.95959596],
        [ 0.97979798,  0.97979798,  0.97979798, ...,  0.97979798,
          0.97979798,  0.97979798],
        [ 1.        ,  1.        ,  1.        , ...,  1.        ,
          1.        ,  1.        ]]))
1. 시점
- 종점은 화살표로 표시해야 하므로 시점만 만들기
 
Code
#1. prepare data
#첫번째 벡터의 시점 x[0],y[0],z[0] 종점 x[1],y[1],z[1]
#두번째 벡터의 시점 x[2],y[2],z[2] 종점 x[2],y[2],z[2]
#두 개씩 묶임
x = [10.1219, 10.42579, 15.21396, 15.42468, 20.29639,20.46268, 25.36298, 25.49156]
y = [5.0545,  5.180104, 5.0545,   5.20337,  5.0545,  5.194271, 5.0545,   5.231627]
z = [5.2713,  5.231409, 5.2713,   5.231409, 5.2713 ,  5.235852,  5.2713, 5.231627]
#pairs = [(0,1),(2,3),(4,5),(6,7)]
[coord for coord in range(0,len(x),2)][0, 2, 4, 6]
Code
#2. trace,data(trace set)
trace1 = go.Scatter3d(
    x=[x[coord] for coord in range(0,len(x),2)],
    y=[y[coord] for coord in range(0,len(y),2)],
    z=[z[coord] for coord in range(0,len(z),2)],
    mode = "markers",
    line=dict(color="red")
)
data = [trace1]
#3. Layout
layout = go.Layout(title=dict(text = "vectors"))
#4. figure
fig = go.Figure(data=data,layout=layout)
fig.show()2. 선 만들기
Code
#1.prepare data
x_lines = list()
y_lines = list()
z_lines = list()
for i in range(len(x)):
    x_lines.append(x[i])
    y_lines.append(y[i])
    z_lines.append(z[i])
    #plotly에서 Scatter의 line mode는 점과 점 사이에 선을 만듦
    #0,1번째 자리의 좌표에는 시점,종점을 넣고 3번째 자리에 None을 추가하여 점을 만들지 않음 
    #따라서, 선이 생기지 않음
    if i % 2 == 1:    
        x_lines.append(None)
        y_lines.append(None)
        z_lines.append(None)
#2.trace and tr_set(=data)
trace2 = go.Scatter3d(
    x=x_lines,
    y=y_lines,
    z=z_lines,
    mode = "lines",
    line = dict(width = 2, color = 'rgb(255, 0,0)')
)
data = [trace2]
#3.layout
layout = go.Layout(title = "lines")
#4.figure
fig = go.Figure(data=data,layout=layout)
#5.plotting
fig.show()3.종점 만들기
Code
Code
import plotly.graph_objs as go
# plotly.offline.init_notebook_mode()
x = [10.1219, 10.42579, 15.21396, 15.42468, 20.29639,20.46268, 25.36298, 25.49156]
y = [5.0545,  5.180104, 5.0545,   5.20337,  5.0545,  5.194271, 5.0545,   5.231627]
z = [5.2713,  5.231409, 5.2713,   5.231409, 5.2713 ,  5.235852,  5.2713, 5.231627]
pairs = [(0,1), (2,3),(4,5), (6,7)]
## plot ONLY the first ball in each pair of balls
trace1 = go.Scatter3d(
    x=[x[p[0]] for p in pairs],
    y=[y[p[0]] for p in pairs],
    z=[z[p[0]] for p in pairs],
    mode='markers',
    name='markers',
    line=dict(color='red')
)
go.Figure(data=trace1)